#coding=utf-8
import urllib
import re
import os
 
weburl = "https://www.vulbox.com/board"
tardir = "/Users/haley/Downloads/36kr/"
 
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
 
def destDir(path):
    if not os.path.isdir(path): 
        os.makedirs(path)
    p = path.split('\\')[-1]
    if not (p==''):
        path = path + '\\'
    return path
 
def getSuffix(fileurl):
    return fileurl.split('.')[-1]
 
def getImg(html,n):
    reg = r'(http.:[\S]*?.(jpg|jpeg|png|gif|bmp|JPG|JPEG|PNG|GIF|BMP))'
    imgall = re.findall(reg,html)
    destPath = destDir(tardir)
    x = 1
    for imgurl,i in imgall:
        urllib.urlretrieve(imgurl,destPath+'%s_' % n +'%s.' % x +getSuffix(imgurl))
        print "完成 ".decode('UTF-8').encode('GBK') + imgurl
        x+=1
 
n = 1
for n in range(1,2): #取前5页
    pageurl = weburl + "/internet/page/" + str(n)
    html = getHtml(pageurl)
    print getImg(html,n)
    print "【完成页面】 ".decode('UTF-8').encode('GBK') + pageurl 
    n = n + 1
 
os.system("pause")